#manual branding - file won't load
transcend_cols = c("#1A4C81","#59C3B4","#EF464B","#ADE0EE")
transcend_cols2 = c("#BC2582","#FFA630","#FFDE42","#99C24D","#218380","#D3B7D7")
transcend_grays = c("#4D4D4F","#9D9FA2","#D1D3D4")
transcend_na = transcend_grays[2]
theme_transcend = theme_gdocs(base_size = 14, base_family = "Open Sans") +
  theme(
    plot.title = element_text(family = "Bebas Neue", color = "black"),
    plot.background = element_blank(),
    axis.text = element_text(colour = "black"),
    axis.title = element_text(colour = "black"),
    panel.border = element_rect(colour = "#4D4D4F"),
    strip.text = element_text(size = rel(0.8)),
    plot.margin = margin(10, 24, 10, 10, "pt")
  )
theme_set(theme_transcend)

Future resources

Level of concern

Guiding question: How concerned are school leaders about sustainability over the next 5 years?

The majority (61%) of Canopy school leaders reported feeling concerned about sustainability over the next 5 years. By contrast, only around 20% of school leaders indicated they were not concerned about sustainability.

variables %>% 
  select(school_id, future_resources) %>% 
  mutate(rate = 1) %>% 
  group_by(future_resources) %>% 
  summarize(n = sum(rate),
            pct = n/189) %>% 
  filter(!is.na(future_resources)) %>% 
  mutate(group = case_when(
    future_resources == "Yes, extremely" | future_resources == "Yes, somewhat" ~ "Concerned",
    future_resources == "No, not at all" | future_resources == "No, not very" ~ "Not concerned",
    TRUE ~ "Neutral"
  ),
  future_resources = factor(future_resources,
                            levels = c("Yes, extremely", "Yes, somewhat", "Neutral", "No, not very", "No, not at all"))) %>% 
  ggplot(., aes(future_resources, pct, fill = group)) +
  geom_col() +
  scale_fill_manual(values = c(transcend_cols[3], transcend_grays[1], transcend_cols[1])) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1), labels = scales::percent_format()) +
  theme(legend.position = "none", panel.grid.major.x = element_blank()) +
  labs(x = "", y = "Percent of schools", title = "School leader concern with future sustainability") +
  geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)), nudge_y = 0.01, vjust = 0, color = transcend_na, fontface = "bold", size = 5.5, family = "sans") +
  scale_x_discrete(labels = wrap_format(10))

Guiding question: Are certain types of schools more/less likely to be concerned about resources?

Suburban schools were least concerned about future sustainability while schools serving rural or multiple geographic regions held the highest levels of concern.

variables %>% 
  select(school_id, future_resources, school_locale) %>% 
  mutate(concern = case_when(
    future_resources == "Yes, extremely" | future_resources == "Yes, somewhat" ~ "Concerned",
    future_resources == "No, not at all" | future_resources == "No, not very" ~ "Not concerned",
    TRUE ~ "Neutral"
  )) %>% 
  mutate(rate = 1) %>% 
  group_by(concern, school_locale) %>% 
  summarize(n = sum(rate)) %>% 
  ungroup() %>% 
  group_by(school_locale) %>% 
  mutate(sum = sum(n),
         pct = n/sum) %>% 
  ungroup() %>% 
  filter(!is.na(school_locale)) %>% 
  mutate(school_locale = factor(school_locale, levels = c("Multiple", "Rural", "Urban", "Suburban"))) %>%
  ggplot(., aes(school_locale, pct, fill = concern)) +
  geom_col() +
  scale_fill_manual(values = c(transcend_cols[3], transcend_na, transcend_cols[1])) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1), labels = scales::percent_format()) +
  geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)), 
            position = position_stack(vjust = .95),
            hjust = 1,
            color = "white", 
            size = 4, 
            family = "sans") +
  theme(legend.position = "bottom", legend.direction = "horizontal", panel.grid.major.y = element_blank(), axis.text.y = element_text(size = 11)) +
  labs(x = "", y = "Percent of schools", title = "School leader concern with \nfuture sustainability", fill = "Level of concern") +
  coord_flip()

Independent schools had the least concern about future sustainability, while public charter schools had the most concern.

variables %>% 
  select(school_id, future_resources, school_type) %>% 
  mutate(concern = case_when(
    future_resources == "Yes, extremely" | future_resources == "Yes, somewhat" ~ "Concerned",
    future_resources == "No, not at all" | future_resources == "No, not very" ~ "Not concerned",
    TRUE ~ "Neutral"
  )) %>% 
  mutate(rate = 1) %>% 
  group_by(concern, school_type) %>% 
  summarize(n = sum(rate)) %>% 
  ungroup() %>% 
  group_by(school_type) %>% 
  mutate(sum = sum(n),
         pct = n/sum) %>% 
  ungroup() %>% 
  filter(!is.na(school_type)) %>% 
  mutate(school_type = factor(school_type, levels = c("Public charter school", "Public district school", "Independent (private) school"))) %>% 
  ggplot(., aes(school_type, pct, fill = concern)) +
  geom_col() +
  scale_fill_manual(values = c(transcend_cols[3], transcend_na, transcend_cols[1])) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1), labels = scales::percent_format()) +
  geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)), 
            position = position_stack(vjust = .95),
            hjust = 1,
            color = "white", 
            size = 5, 
            family = "sans") +
  theme(legend.position = "bottom", legend.direction = "horizontal", panel.grid.major.y = element_blank(), axis.text.y = element_text(size = 11)) +
  labs(x = "", y = "Percent of schools", title = "School leader concern with \nfuture sustainability", fill = "Level of concern") +
  scale_x_discrete(labels = wrap_format(20)) +
  coord_flip()

Schools leaders across grade bands reported similar levels of concern with future sustainability.

variables %>% 
  select(school_id, future_resources, grades_pk, grades_elementary, grades_middle, grades_high) %>% 
  mutate(concern = case_when(
    future_resources == "Yes, extremely" | future_resources == "Yes, somewhat" ~ "Concerned",
    future_resources == "No, not at all" | future_resources == "No, not very" ~ "Not concerned",
    TRUE ~ "Neutral"
  )) %>% 
  group_by(concern) %>% 
  summarize(PK = sum(grades_pk),
            Elementary = sum(grades_elementary),
            Middle = sum(grades_middle),
            High = sum(grades_high)) %>% 
  ungroup() %>% 
  pivot_longer(cols = c(PK, Elementary, Middle, High),
               names_to = "school_level",
               values_to = "n") %>% 
  group_by(school_level) %>% 
  mutate(sum = sum(n),
         pct = n/sum) %>% 
  ungroup() %>% 
  mutate(school_level = factor(school_level, levels = c("Elementary", "High", "Middle", "PK"))) %>% 
  ggplot(., aes(school_level, pct, fill = concern)) +
  geom_col() +
  scale_fill_manual(values = c(transcend_cols[3], transcend_na, transcend_cols[1])) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1), labels = scales::percent_format()) +
  geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)), 
            position = position_stack(vjust = .95),
            hjust = 1,
            color = "white", 
            size = 5, 
            family = "sans") +
  theme(legend.position = "bottom", legend.direction = "horizontal", panel.grid.major.y = element_blank(), axis.text.y = element_text(size = 11)) +
  labs(x = "", y = "Percent of schools", title = "School leader concern with \nfuture sustainability", fill = "Level of concern") +
  coord_flip()

Schools reporting additional school descriptors had higher levels of concern about future sustainability, particularly hybrid schools, virtual schools, and homeschooling cooperatives. However, the number of schools that fall into each of these categories is much less stable than for other categories. For instance, homeschooling cooperatives only describe 3 schools in our sample, thus, the proportion of concern is accurate, but we should be careful about the amount of emphasis we place on this finding as it only represents a tiny fraction of our sample.

variables %>% 
  select(school_id, future_resources, starts_with("school_desc"), -school_desc_other_text) %>% 
  mutate(concern = case_when(
    future_resources == "Yes, extremely" | future_resources == "Yes, somewhat" ~ "Concerned",
    future_resources == "No, not at all" | future_resources == "No, not very" ~ "Not concerned",
    TRUE ~ "Neutral"
  )) %>% 
  group_by(concern) %>% 
  summarize(`Homeschooling cooperative` = sum(school_desc_homeschool),
            `Hybrid school` = sum(school_desc_hybrid),
            Microschool = sum(school_desc_micro),
            `Part time school` = sum(school_desc_part_time),
            `School within school` = sum(school_desc_sws),
            `Virtual school` = sum(school_desc_virtual),
            Other = sum(school_desc_other)) %>% 
  ungroup() %>% 
  pivot_longer(cols = !concern,
               names_to = "school_desc",
               values_to = "n") %>% 
  group_by(school_desc) %>% 
  mutate(sum = sum(n),
         pct = n/sum) %>% 
  ungroup() %>% 
  mutate(school_desc = factor(school_desc, levels = c("Virtual school", "Hybrid school", "Homeschooling cooperative", "Other", "Part time school", "School within school", "Microschool"))) %>% 
  ggplot(., aes(school_desc, pct, fill = concern)) +
  geom_col() +
  scale_fill_manual(values = c(transcend_cols[3], transcend_na, transcend_cols[1])) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1), labels = scales::percent_format()) +
  theme(legend.position = "bottom", legend.direction = "horizontal", panel.grid.major.y = element_blank(), axis.text.y = element_text(size = 11)) +
  geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)), 
            position = position_stack(vjust = .95),
            hjust = 1,
            color = "white", 
            size = 5, 
            family = "sans") +
  labs(x = "", y = "Percent of schools", title = "School leader concern with \nfuture sustainability", fill = "Level of concern") +
  scale_x_discrete(labels = wrap_format(20)) +
  coord_flip()

In the figure below, I ran a logistic regression model to estimate the likelihood for a school leader to indicate they were concerned with future sustainability, using school characteristics as additional covariates. Note that I transformed the future_resources variable into a binary, with 1 indicating concern (“Yes, extremely”; “Yes, somewhat). I’m open to feedback about alternate modeling choices–I’m less familiar with multivariate approaches and was not entirely sure how to set it up or if it was a preferable approach.

School that are predominantly BIPOC-led were far more likely to indicate they were concerned about future sustainability, more than 20x more likely than schools with 0-25% leaders of color. Elementary schools, charter schools, and rural schools or schools serving students in more than one geographic locale were all also more likely to indicate they were concerned about future sustainability.

#prep data
mod_dat <- variables %>% 
  select(school_id, future_resources, school_locale, school_type, grades_pk, grades_elementary, grades_middle, grades_high, school_enrollment, pct_bipoc, pct_ell, pct_frpl, pct_swd, teaching_diversity, leadership_diversity) %>% 
  mutate(future_resources = case_when(
    future_resources == "Yes, extremely" ~ 1,
    future_resources == "Yes, somewhat" ~ 1,
    TRUE ~ 0
  ),
  teaching_diversity = gsub("people", "teachers", teaching_diversity),
  leadership_diversity = gsub("people", "leaders", leadership_diversity),
  school_locale = factor(school_locale, levels = c("Urban", "Suburban", "Rural", "Multiple")),
  school_type = factor(school_type, levels = c("Public district school", "Public charter school", "Independent (private) school")),
  teaching_diversity = factor(teaching_diversity, levels = c("0 - 24% teachers of color", "25 - 49% teachers of color", "50 - 74% teachers of color", "75 - 100% teachers of color")),
  leadership_diversity = factor(leadership_diversity, levels = c("0 - 24% leaders of color", "25 - 49% leaders of color", "50 - 74% leaders of color", "75 - 100% leaders of color")),
  school_enrollment = as.numeric(scale(school_enrollment, center = TRUE, scale = TRUE))) %>% 
  mutate(across(starts_with("pct"), ~as.numeric(scale(., center = TRUE, scale = TRUE))))
#model
mod1 <- glm(future_resources ~ school_locale + school_type + grades_pk + grades_elementary + grades_middle + grades_high + school_enrollment + pct_bipoc + pct_ell + pct_frpl + pct_swd + teaching_diversity + leadership_diversity,
               family = "binomial",
               data = mod_dat)
# set labels
cov_labels <- c(
  "school_typeIndependent (private) school" = "Independent (private) school",
  "school_typePublic charter school" = "Public charter school",
  "grades_pk" = "PreK",
  "grades_elementary" = "Elementary",
  "grades_middle" = "Middle",
  "grades_high" = "High",
  "school_enrollment" = "School Enrollment",
  "pct_bipoc" = "% BIPOC students",
  "pct_ell" = "% EL-designated students",
  "pct_frpl" = "% FRPL-eligible",
  "pct_swd" = "% Students with disabilities",
  "school_localeMultiple" = "Multiple locales",
  "school_localeSuburban" = "Suburban",
  "school_localeRural" = "Rural",
  "leadership_diversity25 - 49% leaders of color" = "25-49% leaders of color",
  "leadership_diversity50 - 74% leaders of color" = "50-74% leaders of color",
  "leadership_diversity75 - 100% leaders of color" = "75-100% leaders of color",
  "teaching_diversity25 - 49% teachers of color" = "25-49% teachers of color",
  "teaching_diversity50 - 74% teachers of color" = "50-74% teachers of color",
  "teaching_diversity75 - 100% teachers of color" = "75-100% teachers of color"
)
# plot
tidy(mod1, effects = "ran_pars", conf.int = TRUE) %>%
  filter(term != "(Intercept)") %>%
  mutate(exp_est = exp(estimate), 
         exp_min = exp(estimate - std.error), 
         exp_max = exp(estimate + std.error)) %>% 
  mutate(term = cov_labels[term]) %>% 
ggplot(., aes(y = fct_reorder(term, exp_est), x = exp_est)) +
  geom_linerange(aes(xmin = exp_min,
                     xmax = exp_max),
                 color = "blue") +
  geom_point() +
  geom_vline(xintercept = 1) +
  scale_x_continuous(
    trans = "log",
    breaks = c(.0625, .2, .5, 1, 2, 5, 16),
    labels = str_wrap(c("1/16 as likely", "1/5 as likely", "1/2 as likely", "Even", "2x as likely", "5x as likely", "16x as likely"), 10),
    expand = expansion(0, 0)
  ) +
  theme_transcend +
  theme(panel.grid.major.y = element_blank()) +
  labs(
    x = "Likelihood",
    y = "",
    title = str_wrap("School characteristics predicting higher concern with future sustainability", 60))

Cause for concern

Guiding question: For leaders who were concerned about future sustainability, what reasons did they cite for their concern?

School leaders expressed the most concern over funding, including availability of local funds and private funding from foundations or donors. They were also concerned about staffing shortages and inflation/increasing prices, to a slightly smaller extent.

#nice labels
barrier_labs <- variables %>% 
  select(starts_with("barrier"), -barrier_other_text) %>% 
  pivot_longer(cols = starts_with("barrier"),
               names_to = "barrier",
               values_to = "response") %>% 
  select(barrier) %>% 
  unique() %>% 
  mutate(label = case_when(
    barrier == "barrier_local_funds" ~ "Availability of local funds",
    barrier == "barrier_private_funds" ~ "Availability of private funding from foundations or donors",
    barrier == "barrier_donations" ~ "Changes to in-kind donations",
    barrier == "barrier_inflation" ~ "Inflation/increasing prices",
    barrier == "barrier_enrollment" ~ "Changes in school enrollment",
    barrier == "barrier_shortage" ~ "Staffing shortages",
    barrier == "barrier_federal_funds" ~ "Expiration of federal relief funds",
    barrier == "barrier_other" ~ "Some other reason"
  ))
#plot
variables %>% 
  select(school_id, future_resources, starts_with("barrier"), -barrier_other_text) %>% 
  filter(future_resources == "Yes, extremely" | future_resources == "Yes, somewhat") %>% 
  pivot_longer(cols = starts_with("barrier"),
               names_to = "barrier",
               values_to = "response") %>% 
  group_by(barrier) %>% 
  summarize(n = sum(response)) %>% 
  left_join(barrier_labs, by = "barrier") %>% 
  ggplot(., aes(reorder(label, n,), n)) +
  geom_col(fill = transcend_cols[1]) +
  theme(legend.position = "none", 
        panel.grid.major.y = element_blank(),
        axis.text.y = element_text(size = 11)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 95)) +
  scale_x_discrete(labels = wrap_format(35)) +
  geom_text(aes(label = n), nudge_y = 0.5, hjust = 0, color = transcend_na, fontface = "bold", size = 5.5, family = "sans") +
  coord_flip() +
  labs(x = "",
       y = "",
       title = "Reasons for concern over future sustainability")

The following graph provides the same information, with the percentage of schools that selected each reason from those who indicated they were concerned (N = 115) rather than a raw number.

variables %>% 
  select(school_id, future_resources, starts_with("barrier"), -barrier_other_text) %>% 
  filter(future_resources == "Yes, extremely" | future_resources == "Yes, somewhat") %>% 
  pivot_longer(cols = starts_with("barrier"),
               names_to = "barrier",
               values_to = "response") %>% 
  group_by(barrier) %>% 
  summarize(n = sum(response),
            pct = n/115) %>% 
  left_join(barrier_labs, by = "barrier") %>% 
  ggplot(., aes(reorder(label, pct), pct)) +
  geom_col(fill = transcend_cols[1]) +
  theme(legend.position = "none", 
        panel.grid.major.y = element_blank(),
        axis.text.y = element_text(size = 11)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1), labels = scales::percent_format()) +
  scale_x_discrete(labels = wrap_format(35)) +
  geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)), hjust = 0, color = transcend_na, fontface = "bold", size = 5.5, family = "sans") +
  coord_flip() +
  labs(x = "",
       y = "",
       title = "Reasons for concern over future sustainability")

Of leaders who expressed concern over future sustainability, 15 chose a reason that was not listed and provided a written response describing an additional factor that may hinder their ability to sustain adequate resources for their school in the next five years. I created a word cloud (thanks for the inspriration and code, Anwesha!) but it wasn’t especially interesting.

# Pull open response answers
responses <- variables %>% 
  select(future_resources, barrier_other_text) %>% 
  filter(future_resources == "Yes, extremely" | future_resources == "Yes, somewhat") %>% 
  filter(!is.na(barrier_other_text))

# Add more stop words
custom_stopwords <- c("yes", "somewhat", "extremely", "school", "schools", "funding", "also", "will", "add", "state", "education", "community")

# Create a text corpus
corpus <- Corpus(VectorSource(responses))

# Text preprocessing
corpus <- tm_map(corpus, content_transformer(tolower))  # Convert to lower case
corpus <- tm_map(corpus, removePunctuation)             # Remove punctuation
corpus <- tm_map(corpus, removeNumbers)                 # Remove numbers
corpus <- tm_map(corpus, removeWords, c(stopwords("english"), custom_stopwords))  # Remove stopwords
corpus <- tm_map(corpus, stripWhitespace)               # Strip whitespace

# Create a document-term matrix
dtm <- TermDocumentMatrix(corpus)

# Convert the matrix to a dataframe
matrix <- as.matrix(dtm)
word_freqs <- sort(rowSums(matrix), decreasing=TRUE)
data <- data.frame(word=names(word_freqs), freq=word_freqs)

# Generate the wordcloud
set.seed(1234) # For reproducibility
wordcloud(words = data$word, freq = data$freq, min.freq = 1,
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

Here’s the original responses:

datatable(responses)

Guiding question: Did the reason for concern vary by school characteristics?

Generally, yes. How school characteristics differed by rationale provided for concern varies by the specific factor of interest, but some high-level takeaways seem to be that leadership diversity and geographic region are playing a large role in the variance, and to a lesser extent, school level and school governance. Predominantly BIPOC-led schools with at least 75% of leaders identifying as a person of color were more likely to select every factor as something that may hinder their ability to sustain adequate resources for their school in the next 5 years.

Note that for this set of models I used a logistic regression model to calculate the likelihood of schools with certain characteristics to select a given factor. School leaders were able to select as many factors as they’d like, so I treated them as independent answers/outcomes using dummy codes.

# model function
log_model <- function(outcome, title){ #outcome needs to be dummy
  #prep data
  data <-  variables %>% 
    select({{outcome}}, school_id, school_locale, school_type, grades_pk, grades_elementary, grades_middle, grades_high, school_enrollment, pct_bipoc, pct_ell, pct_frpl, pct_swd, teaching_diversity, leadership_diversity) %>% 
    mutate(teaching_diversity = gsub("people", "teachers", teaching_diversity),
           leadership_diversity = gsub("people", "leaders", leadership_diversity),
           school_locale = factor(school_locale, levels = c("Urban", "Suburban", "Rural", "Multiple")),
           school_type = factor(school_type, levels = c("Public district school", "Public charter school", "Independent (private) school")),
           teaching_diversity = factor(teaching_diversity, levels = c("0 - 24% teachers of color", "25 - 49% teachers of color", "50 - 74% teachers of color", "75 - 100% teachers of color")),
           leadership_diversity = factor(leadership_diversity, levels = c("0 - 24% leaders of color", "25 - 49% leaders of color", "50 - 74% leaders of color", "75 - 100% leaders of color")),
           school_enrollment = as.numeric(scale(school_enrollment, center = TRUE, scale = TRUE))) %>% 
    mutate(across(starts_with("pct"), ~as.numeric(scale(., center = TRUE, scale = TRUE))))
  #model
  mod <- glm({{outcome}} ~ school_locale + school_type + grades_pk + grades_elementary + grades_middle + grades_high + school_enrollment + pct_bipoc + pct_ell + pct_frpl + pct_swd + teaching_diversity + leadership_diversity,
               family = "binomial",
               data = data)
  #plot
  plot <- tidy(mod, effects = "ran_pars", conf.int = TRUE) %>%
  filter(term != "(Intercept)") %>%
  mutate(exp_est = exp(estimate), 
         exp_min = exp(estimate - std.error), 
         exp_max = exp(estimate + std.error)) %>% 
  mutate(term = cov_labels[term]) %>% 
  ggplot(., aes(y = fct_reorder(term, exp_est), x = exp_est)) +
  geom_linerange(aes(xmin = exp_min,
                     xmax = exp_max),
                 color = "blue") +
  geom_point() +
  geom_vline(xintercept = 1) +
  scale_x_continuous(
  trans = "log",
  breaks = c(.0625, .25, .5, 1, 2, 4, 16),
  labels = str_wrap(c("1/16 as likely", "1/4 as likely", "1/2 as likely", "Even", "2x as likely", "4x as likely", "16x as likely"), 10),
  expand = expansion(0, 0)
  ) +
  theme_transcend +
  theme(panel.grid.major.y = element_blank()) +
  labs(
    x = "",
    y = "",
    title = str_wrap(title, 60))
  return(plot)}
log_model(variables$barrier_local_funds, "School characteristics predicting higher concern with availability of local funds")

log_model(variables$barrier_private_funds, "School characteristics predicting higher concern with availability of private funds")

log_model(variables$barrier_donations, "School characteristics predicting higher concern with changes to in-kind donations")

log_model(variables$barrier_inflation, "School characteristics predicting higher concern with inflation/increasing prices")

log_model(variables$barrier_enrollment, "School characteristics predicting higher concern with changes in school enrollment")

log_model(variables$barrier_shortage, "School characteristics predicting higher concern with staffing shortages")

log_model(variables$barrier_federal_funds, "School characteristics predicting higher concern with expiration of federal relief funds")

Future transformations

Guiding question: Which factors do school leaders think will be most transformative for schools in the next 5 years?

The top 3 factors school leaders cited that they expect will be most transformative for K-12 schooling in the next 5 years are teacher workforce issues, AI, and the ongoing mental health crisis. I was surprised to see politics/culture wars and pandemic learning loss so low on the list given recent conversations with leaders!

transform_labels <- variables %>% 
  select(starts_with("transform"), -transform_other_text) %>% 
  pivot_longer(cols = starts_with("transform"),
               names_to = "factor",
               values_to = "response") %>% 
  select(factor) %>% 
  unique() %>% 
  mutate(label = case_when(
    factor == "transform_ai" ~ "Artificial intelligence",
    factor == "transform_mental_health" ~ "Mental health crisis",
    factor == "transform_pandemic" ~ "Pandemic learning loss",
    factor == "transform_workforce" ~ "Teacher workforce issues",
    factor == "transform_enrollment" ~ "Changes in school enrollment",
    factor == "transform_politics" ~ "Politics/culture wars",
    factor == "transform_esa" ~ "ESAs or voucher policies",
    factor == "transform_climate" ~ "Climate change",
    factor == "transform_other" ~ "Some other reason"
  ))
variables %>% 
  select(school_id, future_resources, starts_with("transform"), -transform_other_text) %>% 
  pivot_longer(cols = starts_with("transform"),
               names_to = "factor",
               values_to = "response") %>% 
  group_by(factor) %>% 
  summarize(n = sum(response)) %>% 
  left_join(transform_labels, by = "factor") %>% 
  ggplot(., aes(reorder(label, n,), n)) +
  geom_col(fill = transcend_cols[1]) +
  theme(legend.position = "none", 
        panel.grid.major.y = element_blank(),
        axis.text.y = element_text(size = 11)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 150)) +
  scale_x_discrete(labels = wrap_format(35)) +
  geom_text(aes(label = n), nudge_y = 0.5, hjust = 0, color = transcend_na, fontface = "bold", size = 5.5, family = "sans") +
  coord_flip() +
  labs(x = "",
       y = "",
       title = str_wrap("Factors school leaders think will be most transformative in the next 5 years", 60))

15 school leaders chose to provide an open response to the question, citing some other factor that was not on our list.

# Pull open response answers
responses <- variables %>% 
  select(transform_other_text) %>% 
  filter(!is.na(transform_other_text))

# Create a text corpus
corpus <- Corpus(VectorSource(responses))

# Text preprocessing
corpus <- tm_map(corpus, content_transformer(tolower))  # Convert to lower case
corpus <- tm_map(corpus, removePunctuation)             # Remove punctuation
corpus <- tm_map(corpus, removeNumbers)                 # Remove numbers
corpus <- tm_map(corpus, removeWords, c(stopwords("english"), custom_stopwords))  # Remove stopwords
corpus <- tm_map(corpus, stripWhitespace)               # Strip whitespace

# Create a document-term matrix
dtm <- TermDocumentMatrix(corpus)

# Convert the matrix to a dataframe
matrix <- as.matrix(dtm)
word_freqs <- sort(rowSums(matrix), decreasing=TRUE)
data <- data.frame(word=names(word_freqs), freq=word_freqs)

# Generate the wordcloud
set.seed(1234) # For reproducibility
wordcloud(words = data$word, freq = data$freq, min.freq = 1,
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

datatable(responses)

Guiding question: Do the top factors school leaders think will be most transformative for schools in the next 5 years differ by school characteristics?

Yes - there were definitely differences in the characteristics more/less likely to select certain factors, but some of these should be interpreted with caution. Maybe only focus on the top 3 reasons (teacher workforce issues, AI, mental health crisis) which had larger sample sizes.

Note that I used the same logistic regression models as above. I am getting warning messages for both sets of models indicating that there is perfect separation resulting in predicted probabilities of 0 or 1. Should I apply some kind of penalty to reign in large coefficients or remove some of the predictors? Sample size is a definite issue with some of these factors that were not highly selected, which you can see easily with the large confidence intervals. Be careful with any interpretation of those findings.

Quick overview table as a reminder:

variables %>% 
  select(school_id, starts_with("transform"), -transform_other_text) %>% 
  pivot_longer(cols = starts_with("transform"),
               names_to = "factor",
               values_to = "response") %>% 
  group_by(factor) %>% 
  summarize(n = sum(response)) %>% 
  datatable()
log_model(variables$transform_ai, "School characteristics predicting citing AI as a factor they predict will shape K-12 education in the next 5 years")

Really surprised to see PK schools here!

log_model(variables$transform_mental_health, "School characteristics predicting citing the mental health crisis as a factor they predict will shape K-12 education in the next 5 years")

log_model(variables$transform_pandemic, "School characteristics predicting citing pandemic learning loss as a factor they predict will shape K-12 education in the next 5 years")

log_model(variables$transform_workforce, "School characteristics predicting citing teacher workforce issues as a factor they predict will shape K-12 education in the next 5 years")

log_model(variables$transform_enrollment, "School characteristics predicting citing changes in school enrollment as a factor they predict will shape K-12 education in the next 5 years")

Not super shocked by this, but interesting to see that schools with higher proportions of leaders of color and teachers of color were much more likely to cite politics/culture wars as a factor.

log_model(variables$transform_politics, "School characteristics predicting citing politics/culture wars as a factor they predict will shape K-12 education in the next 5 years")

Got a huge coefficient for private schools here–150x more likely to have chosen this factor! Only 22 schools selected this factor, and 14 of them were private schools.

log_model(variables$transform_esa, "School characteristics predicting citing Education Savings Accounts and vouchers as a factor they predict will shape K-12 education in the next 5 years")

#quick check
variables %>% 
  select(school_type, transform_esa) %>% 
  group_by(school_type) %>% 
  summarize(n = sum(transform_esa))
## # A tibble: 3 × 2
##   school_type                      n
##   <chr>                        <int>
## 1 Independent (private) school    14
## 2 Public charter school            3
## 3 Public district school           5

Only 10 schools selected climate change as a factor, leading to really unstable modeling results.

log_model(variables$transform_climate, "School characteristics predicting citing climate change as a factor they predict will shape K-12 education in the next 5 years")